# -*- coding: UTF-8 -*-
import re
from business.spider import SpiderHelp
from tool.StringTools import StringTools
from dao.mysql import Mysql
import time

url = "http://search.ccgp.gov.cn/bxsearch"

def ccgp():
    spider = SpiderHelp()
    # 无意义的：
    # bidSort=0、searchtype = 1，表搜索标题、pinMu=0 表所有品目以防网站放置错误、pppStatus=0 ppp项目不选
    # 有意义的：
    # bidType 招标类型：公开，询价等等，默认0表所有 timeType=4 最近3个月
    # 最大页码数量，默认100
    timeType = 6
    payload = {"searchtype": 1, "bidSort": 0,'pinMu':3,
               "page_index": 1, "bidType": 0,
               "start_time": "2017:11:13", "end_time": "2017:11:13", "timeType": timeType}

    # 获得最大页码
    soup = spider.beautifulsoup_byget(url, payload)
    size = StringTools.txt_wrap_by("size:", ",", soup.find("p", class_="pager").text)
    for i in range(2, int(size) + 1):
        print("第", i, "页,共", size, "页")
        payload = {"searchtype": 1, "bidSort": 0,'pinMu':3,
                   "page_index": i, "bidType": 0,
                   "start_time": "2017:11:13", "end_time": "2017:11:13", "timeType": timeType}
        soup = spider.beautifulsoup_byget(url, payload)
        pattern = re.compile(r'(\w)*cggg(\w)*')
        for link in spider.target_links(soup, pattern):
            mysql = Mysql()
            spiderbody = SpiderHelp()
            soup = spiderbody.beautifulsoup_byget(link)
            sql = "INSERT INTO `raw_source` (`origin_url`, `origin_html`, `status`, `site_id`, `gmt_create`)" \
                  "VALUES (%s, %s, 0, '1', sysdate())"
            val = (link, soup)
            row = mysql.insertOne(sql, val)
            mysql.dispose(1)
            print(link, ",第", row, "行执行完成")
        time.sleep(1)

ccgp()